# Loading Packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(bulletxtrctr)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl_init' failed, running with rgl.useNULL = TRUE
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
library(x3ptools)
library(ggplot2)
library(readr)
library(purrr)
library(stringr)
library(tidyr)
load("Ground_Truth_Data.RData")
#For Html Purposes
#Checking scan orientation
image(Phoenix$x3p[[1]])
image(Phoenix$x3p[[10]])
image(Phoenix$x3p[[100]])
image(Phoenix$x3p[[200]])
Notes Bullets from the same barrel are considered matches. Bullets from different barrels are not considered matches. In case a bullet is unknown, we do not know whether there is a match or not.
Lands from the same bullet are considered matches if they are from the same barrel. In case the lands are from bullets from different barrels they are not matches.
Questions to be Answered There are a few key relationships we are trying to determine when comparing two Barrel-Bullet-Land IDs: - Matches between questioned and known bullets - Matches between questioned and other questioned bullets - Same lands between different bullets from the same barrel. - Same lands between any questioned bullets with any other bullet.
# Determining Ground Truth
# These features were obtained from the 2019 summer Bullet Project
# Features were extracted from Bullet-Barrel-Land scans using the x3ptools & bulletxtrctr packages
features <- Phoenix_Comparisons %>%
select(-cms2_per_mm, -cms_per_mm, -lag_mm, -length_mm,
-matches_per_mm, -mismatches_per_mm, -non_cms_per_mm, -rfscore)
features <- features %>% mutate(
samesource = ifelse(BarrelA==BarrelB & BulletA == BulletB, LandA==LandB, NA),
samebarrel = BarrelA == BarrelB,
samebarrel = ifelse((BarrelA == "Unknown" | BarrelB == "Unknown") & (BulletA != BulletB), NA, samebarrel),
samesource = ifelse(!samebarrel, FALSE, samesource)
)
features <- features %>% mutate(
comparison = paste0(pmin(Bullet1, Bullet2)," vs ", pmax(Bullet1, Bullet2))
)
Explanation We have created three new features: samesource - 1/6 between Barrel-Bullet-Land Scores samebarrel - Same Barrels when being compared or not the same Barrels when being compared comparison - Essential an ID column saying which Barrel-Bullet-Lands are being compared
head(features, 10)
## # A tibble: 10 x 24
## Bullet1 Bullet2 ccf cms cms2 D lag length matches mismatches
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A9-B1-1 A9-B1-1 1 39 19 0 0 2821 39 0
## 2 A9-B1-1 A9-B1-2 0.504 2 3 0.0320 -575 2716 5 31
## 3 A9-B1-1 A9-B1-3 0.337 2 4 0.0341 579 2663 7 28
## 4 A9-B1-1 A9-B1-4 0.339 3 2 0.0428 -386 2551 7 28
## 5 A9-B1-1 A9-B1-5 0.227 2 2 0.0341 497 2668 7 31
## 6 A9-B1-1 A9-B1-6 0.292 2 2 0.0303 -301 2684 6 30
## 7 A9-B1-1 A9-B2-1 0.539 2 2 0.0283 -640 2778 5 34
## 8 A9-B1-1 A9-B2-2 0.272 4 5 0.0382 -580 2718 9 30
## 9 A9-B1-1 A9-B2-3 0.365 3 2 0.0335 488 2821 3 35
## 10 A9-B1-1 A9-B2-4 0.187 1 0 0.0268 -4 2821 2 18
## # … with 14 more variables: non_cms <dbl>, overlap <dbl>, rough_cor <dbl>,
## # sd_D <dbl>, sum_peaks <dbl>, BarrelA <chr>, BarrelB <chr>,
## # BulletA <chr>, BulletB <chr>, LandA <chr>, LandB <chr>,
## # samesource <lgl>, samebarrel <lgl>, comparison <chr>
features <- features %>% select(-cms2, -lag, -length) # Cleaning for future exploration
head(features, 10)
## # A tibble: 10 x 21
## Bullet1 Bullet2 ccf cms D matches mismatches non_cms overlap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A9-B1-1 A9-B1-1 1 39 0 39 0 0 1
## 2 A9-B1-1 A9-B1-2 0.504 2 0.0320 5 31 10 0.788
## 3 A9-B1-1 A9-B1-3 0.337 2 0.0341 7 28 12 0.842
## 4 A9-B1-1 A9-B1-4 0.339 3 0.0428 7 28 11 0.849
## 5 A9-B1-1 A9-B1-5 0.227 2 0.0341 7 31 10 0.871
## 6 A9-B1-1 A9-B1-6 0.292 2 0.0303 6 30 10 0.888
## 7 A9-B1-1 A9-B2-1 0.539 2 0.0283 5 34 13 0.770
## 8 A9-B1-1 A9-B2-2 0.272 4 0.0382 9 30 14 0.787
## 9 A9-B1-1 A9-B2-3 0.365 3 0.0335 3 35 22 0.827
## 10 A9-B1-1 A9-B2-4 0.187 1 0.0268 2 18 13 0.999
## # … with 12 more variables: rough_cor <dbl>, sd_D <dbl>, sum_peaks <dbl>,
## # BarrelA <chr>, BarrelB <chr>, BulletA <chr>, BulletB <chr>,
## # LandA <chr>, LandB <chr>, samesource <lgl>, samebarrel <lgl>,
## # comparison <chr>
Notes Part 2 We are going to calculate: - “bestlandmatch” using SAM scores to determine which lands match the best between any two bullets
“sameland_pred” is to check if the SAM score is significantly higher than the non-matching lands(based on a bootsrap)
“samebarrel_pred” is the maximum for each bullet to bulelt comparison
f2nest <- features %>%
group_by(BarrelA, BulletA, BarrelB, BulletB) %>% nest()
f3nest <- f2nest %>% mutate(
sam_ccf = data %>% purrr::map_dbl(.f=function(d)
max(bulletxtrctr::compute_average_scores(d$LandA, d$LandB, d$ccf, addNA = TRUE)))
)
head(f3nest)
## # A tibble: 6 x 6
## # Groups: BarrelA, BarrelB, BulletA, BulletB [6]
## BarrelA BarrelB BulletA BulletB data sam_ccf
## <chr> <chr> <chr> <chr> <list<df[,17]>> <dbl>
## 1 A9 A9 B1 B1 [36 × 17] 1
## 2 A9 A9 B2 B1 [36 × 17] 0.883
## 3 A9 A9 B3 B1 [36 × 17] 0.771
## 4 C8 A9 B1 B1 [30 × 17] 0.399
## 5 C8 A9 B2 B1 [36 × 17] 0.403
## 6 C8 A9 B3 B1 [36 × 17] 0.371
f3nest <- f3nest %>% mutate(
data = data %>% purrr::map(.f = function(d) {
d$bestlandmatch = bulletxtrctr::bullet_to_land_predict(d$LandA, d$LandB, d$ccf, difference = -1, alpha = 1, addNA = FALSE)
d
})
)
head(f3nest)
## # A tibble: 6 x 6
## # Groups: BarrelA, BarrelB, BulletA, BulletB [6]
## BarrelA BarrelB BulletA BulletB data sam_ccf
## <chr> <chr> <chr> <chr> <list> <dbl>
## 1 A9 A9 B1 B1 <tibble [36 × 18]> 1
## 2 A9 A9 B2 B1 <tibble [36 × 18]> 0.883
## 3 A9 A9 B3 B1 <tibble [36 × 18]> 0.771
## 4 C8 A9 B1 B1 <tibble [30 × 18]> 0.399
## 5 C8 A9 B2 B1 <tibble [36 × 18]> 0.403
## 6 C8 A9 B3 B1 <tibble [36 × 18]> 0.371
f3nest <- f3nest %>% mutate(
data = data %>% purrr::map(.f = function(d) {
d$sameland_pred = bulletxtrctr::bullet_to_land_predict(d$LandA, d$LandB, d$ccf, difference = 0.1, alpha = 0.05, addNA = FALSE)
d
})
)
head(f3nest)
## # A tibble: 6 x 6
## # Groups: BarrelA, BarrelB, BulletA, BulletB [6]
## BarrelA BarrelB BulletA BulletB data sam_ccf
## <chr> <chr> <chr> <chr> <list> <dbl>
## 1 A9 A9 B1 B1 <tibble [36 × 19]> 1
## 2 A9 A9 B2 B1 <tibble [36 × 19]> 0.883
## 3 A9 A9 B3 B1 <tibble [36 × 19]> 0.771
## 4 C8 A9 B1 B1 <tibble [30 × 19]> 0.399
## 5 C8 A9 B2 B1 <tibble [36 × 19]> 0.403
## 6 C8 A9 B3 B1 <tibble [36 × 19]> 0.371
f3nest <- f3nest %>% mutate(
samebarrel_pred = data %>% purrr::map_dbl(.f = function(d)
max(d$sameland_pred))
)
head(f3nest)
## # A tibble: 6 x 7
## # Groups: BarrelA, BarrelB, BulletA, BulletB [6]
## BarrelA BarrelB BulletA BulletB data sam_ccf samebarrel_pred
## <chr> <chr> <chr> <chr> <list> <dbl> <dbl>
## 1 A9 A9 B1 B1 <tibble [36 × 19… 1 1
## 2 A9 A9 B2 B1 <tibble [36 × 19… 0.883 1
## 3 A9 A9 B3 B1 <tibble [36 × 19… 0.771 1
## 4 C8 A9 B1 B1 <tibble [30 × 19… 0.399 0
## 5 C8 A9 B2 B1 <tibble [36 × 19… 0.403 0
## 6 C8 A9 B3 B1 <tibble [36 × 19… 0.371 0
Notes on Newly created features bestlandmatch and sameland_pred are both nested currently in the data column. This column is a tibble. samebarrel_pred is showing the lands for each barrel comparisons of which have the higher sameland_pred scores. It seems to be the first 3 lands between BulletA and BulletB.
##Check Predictions - Now use "bestlandmatch to identify matching lands betweem different bullets from the same barrel
f3long <- f3nest %>%
filter(BarrelA == BarrelB, BulletA != BulletB, BarrelA != "Unknown") %>%
unnest(data) # Only looking at Barrels that are the same
head(f3long)
## # A tibble: 6 x 25
## # Groups: BarrelA, BarrelB, BulletA, BulletB [1]
## BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 ccf cms D
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 A9 A9 B2 B1 A9-B1-1 A9-B2-1 0.539 2 0.0283
## 2 A9 A9 B2 B1 A9-B1-1 A9-B2-2 0.272 4 0.0382
## 3 A9 A9 B2 B1 A9-B1-1 A9-B2-3 0.365 3 0.0335
## 4 A9 A9 B2 B1 A9-B1-1 A9-B2-4 0.187 1 0.0268
## 5 A9 A9 B2 B1 A9-B1-1 A9-B2-5 0.362 5 0.0267
## 6 A9 A9 B2 B1 A9-B1-1 A9-B2-6 0.927 12 0.00757
## # … with 16 more variables: matches <dbl>, mismatches <dbl>,
## # non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## # sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## # samebarrel <lgl>, comparison <chr>, bestlandmatch <lgl>,
## # sameland_pred <lgl>, sam_ccf <dbl>, samebarrel_pred <dbl>
###Let’s Plot some Heat Maps
f3long %>%
ggplot(aes(x = LandA, y = LandB, fill = ccf))+
geom_tile()+
facet_wrap(~BarrelA+BulletA)+
scale_fill_gradient2(low = "darkgrey", high = "darkorange", midpoint = 0.5)+
scale_colour_manual(values = "darkorange")+
geom_point(colour = "midnightblue", data = f3long %>% filter(bestlandmatch))
f3long %>%
ggplot(aes(x = LandA, y = LandB, fill = ccf))+
geom_tile()+
facet_wrap(~BarrelA+BulletA+BulletB)+
scale_fill_gradient2(low = "darkgrey", high = "darkorange", midpoint = 0.5)+
scale_colour_manual(values = "darkorange")+
geom_point(colour = "midnightblue", data = f3long %>% filter(bestlandmatch))
#Distribution of best land matches from f3long
table(f3long$bestlandmatch)
##
## FALSE TRUE
## 1400 280
#Distribution of samesource
table(features$samesource)
##
## FALSE TRUE
## 18642 202
#Distribution of samebarrel
table(features$samebarrel)
##
## FALSE TRUE
## 17642 2882
There seems to be a small amount of missing data. Barrel C8, Bullet B1, Land 3 is missing. Barrel U1, Bullet B3, Land 5 is missing.
Investigate the missing data Barrels C8 and U1 — id: C8-B1-3 and U1-B3-5 are missing in the original phoenix data_set
#Eye-Catchers(Best Land Matches that are not high ccf values):
Barrel F6: B1|B3(4,3), B2|B3(2,3) Barrel L5: B1|B2(1,2), B2|B3(2,1) Barrel M2: B1|B2(1,2), B2|B3(2,5) Barrel R3: B1|B3(4,5)+(5,6) [Weird but also same exact ccf values but differnt bullet comparisons] B2|B3(4,5)+(5,6) Barrel U1: B1|B2(1,3)+(2,4), B1|B3(2,6)
F6-B1-4 vs F6-B3-3: samebarrel = TRUE, bestlandmatch = TRUE, sameland_pred = TRUE, ccf = 0.52, sam_ccf = 0.72, (matches and mismatches both = 18?, overlap = 1.00)
F6-B2-2 vs F6-B3-3: samebarrel = TRUE, beslandmatch = TRUE, sameland_pred = TRUE, ccf = 0.49, sam_ccf = 0.71, (matches = 28, mismatches = 11, overlap = 0.99)
# F6-B1-4 and F6-B3-3
# F6-B2-2 and F6-B3-3
image(Phoenix$x3p[[39]])
image(Phoenix$x3p[[50]])
image(Phoenix$x3p[[43]])
image(Phoenix$x3p[[50]])
# F6-B1-4 and F6-B2-2 seem to be in very bad shape
#### Barrel M2: B1|B2(1,2), B2|B3(2,5)
# M2-B1-1 and M2-B2-2
# M2-B2-2 and M2-B2-5
image(Phoenix$x3p[[72]])
image(Phoenix$x3p[[79]])
image(Phoenix$x3p[[79]])
image(Phoenix$x3p[[82]])
#Lands 1, 2, and 5 seem to have similar damage near the heal of the bullet
#### Barrel R3: B1|B3(4,5)+(5,6) [Weird but also same exact ccf values but differnt bullet comparisons] B2|B3(4,5)+(5,6)
# R3-B1-4 and R3-B3-5
# R3-B1-5 and R3-B3-6
# R3-B2-4 and R3-B3-5
# R3-B2-5 and R3-B3-6
image(Phoenix$x3p[[111]])
image(Phoenix$x3p[[124]])
image(Phoenix$x3p[[112]])
image(Phoenix$x3p[[125]])
image(Phoenix$x3p[[117]])
image(Phoenix$x3p[[124]])
image(Phoenix$x3p[[118]])
image(Phoenix$x3p[[125]])
features <- f3nest %>% unnest(data)
features <- features %>% mutate(
samesource = ifelse(BarrelA == BarrelB & BulletA != BulletB & BarrelA != "Unknown", bestlandmatch, samesource)
)
head(features, 10)
## # A tibble: 10 x 25
## # Groups: BarrelA, BarrelB, BulletA, BulletB [1]
## BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 ccf cms D
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 A9 A9 B1 B1 A9-B1-1 A9-B1-1 1 39 0
## 2 A9 A9 B1 B1 A9-B1-1 A9-B1-2 0.504 2 0.0320
## 3 A9 A9 B1 B1 A9-B1-1 A9-B1-3 0.337 2 0.0341
## 4 A9 A9 B1 B1 A9-B1-1 A9-B1-4 0.339 3 0.0428
## 5 A9 A9 B1 B1 A9-B1-1 A9-B1-5 0.227 2 0.0341
## 6 A9 A9 B1 B1 A9-B1-1 A9-B1-6 0.292 2 0.0303
## 7 A9 A9 B1 B1 A9-B1-2 A9-B1-1 0.504 2 0.0320
## 8 A9 A9 B1 B1 A9-B1-2 A9-B1-2 1 41 0
## 9 A9 A9 B1 B1 A9-B1-2 A9-B1-3 0.367 1 0.0386
## 10 A9 A9 B1 B1 A9-B1-2 A9-B1-4 0.370 2 0.0330
## # … with 16 more variables: matches <dbl>, mismatches <dbl>,
## # non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## # sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## # samebarrel <lgl>, comparison <chr>, bestlandmatch <lgl>,
## # sameland_pred <lgl>, sam_ccf <dbl>, samebarrel_pred <dbl>
#Description of features same source, FALSE and TRUE both increased in entries
table(features$samesource)
##
## FALSE TRUE
## 20042 482
f3nest <- features %>%
group_by(comparison, BarrelA, BulletA, BarrelB,
BulletB, samebarrel, samebarrel_pred, sam_ccf,
Bullet1, Bullet2) %>% nest()
Matches with questioned bullets
Any matches with questioned bullets are based on predictions only. Ground truth can only be determined in collaboration with the owners of the study.
This affects all of the following open questions: -matches between questioned and known bullets - matches between questioned and other questioned bullets - same lands between any questioned bullets woth any other bullets
In comparing known samesource(base on bootstrap) we expect to see orange dots(predicted samesource) for known samesource and gray dots(predicted different source) for known different source comparisons $(*)
f3nest <- f3nest %>% mutate(
truth = c("different-source", "same-source")[samebarrel+1],
truth = ifelse(is.na(samebarrel), "Unknown", truth),
prediction = c("different-source", "same-source")[samebarrel_pred+1]
)
# plot $(*)
f3nest %>% group_by(comparison) %>%
summarise(sam_ccf = mean(sam_ccf),
truth = truth[1],
prediction = prediction[1]) %>%
ggplot(aes(x = sam_ccf, y = truth, colour = prediction))+
geom_jitter()+
theme_bw()+
scale_colour_manual(values = c("darkgrey", "darkorange"))
Graph Analysis Looking at the graph above we can see many points plotted but what is being described here? We have 3 sources which are “same-source”, “different-source”, and “Unknown.”
Our results look very good. We would expect to see same-source points plotted around higher sam_ccf scores and different-source points plotted at the lower end of sam_ccf scores. The Predicted “same-source” points are ploted around the true “same-source” points. The predicted “different-source” points are plotted arund the true “different-source” points. For the true “unknown” barrels in question, we see a mix of predicted “different source” and “same-source” points. There doesn’t seem to be any predicted “different-source”" points plotted around the true “same-source” area and vice versus. These results are exactly what we were hoping for.
#There does not seem to be any recognized truth that is dofferent than the predictions
f3nest %>% filter(truth == "different-source" & prediction == "same-source")
## # A tibble: 0 x 13
## # Groups: BarrelA, BarrelB, BulletA, BulletB, Bullet1, Bullet2,
## # samebarrel, comparison, sam_ccf, samebarrel_pred [0]
## # … with 13 variables: BarrelA <chr>, BarrelB <chr>, BulletA <chr>,
## # BulletB <chr>, Bullet1 <chr>, Bullet2 <chr>, samebarrel <lgl>,
## # comparison <chr>, sam_ccf <dbl>, samebarrel_pred <dbl>,
## # data <list<df[,15]>>, truth <chr>, prediction <chr>
#Filter for questioned bullets
questioned <- f3nest %>% filter(BarrelB == "Unknown")
table(questioned$BarrelA)
##
## A9 C8 F6 L5 M2 P7 R3 U1 Unknown
## 1080 1020 1080 1080 1080 1080 1080 1020 3600
questioned <- questioned %>%
ungroup(BarrelA) %>%
mutate(
BarrelA = factor(factor(BarrelA), levels = c("A9", "C8", "F6", "L5", "M2", "P7", "R3", "U1", "Unknown")),
BulletB = factor(BulletB, levels = rev(c("B", "E", "H", "J", "K", "N", "Q", "T", "Y", "Z"))),
BulletA = factor(BulletA, levels = c("B1", "B2", "B3", rev(levels(BulletB))))
)
questioned %>%
ggplot(aes(x = BulletA, y = BulletB, fill=sam_ccf)) +
geom_tile(data = questioned) +
scale_fill_gradient2(low="darkgrey", high="darkorange", midpoint=.5) +
scale_colour_manual(values=c("darkorange")) +
facet_grid(.~BarrelA, space = "free", scales="free") +
theme_bw() +
geom_point(colour = "midnightblue", data = questioned %>% filter(samebarrel_pred==1))
The figure suggests that mostly, we should be able to accept the same-source predictions for each of the barrels as ground truth. In particular, concistency in the matches between a questioned bullet and a single barrel provide strong evidence in the correctness of a prediction: ideally, we would like to see a match between - a questioned bullet and all three bullets of a single barrel - questioned bullets that match to the same barrel
This is true for all unknown bullets with the exception of bullets Q, Y, and Z. Bullets Q, Y, and Z match to none of the bullets of any barrel.
f3nest <- f3nest %>% ungroup(samebarrel) %>%
mutate(
samebarrel = ifelse(BarrelA == "Unknown" & (BulletA != BulletB),
samebarrel_pred == 1, samebarrel)
)
f3nest <- f3nest %>% mutate(
samebarrel = ifelse(BarrelB == "Unknown" & (BulletA != BulletB),
samebarrel_pred == 1, samebarrel)
)
head(f3nest)
## # A tibble: 6 x 13
## BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 samebarrel comparison
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <chr>
## 1 A9 A9 B1 B1 A9-B1-1 A9-B1-1 TRUE A9-B1-1 v…
## 2 A9 A9 B1 B1 A9-B1-1 A9-B1-2 TRUE A9-B1-1 v…
## 3 A9 A9 B1 B1 A9-B1-1 A9-B1-3 TRUE A9-B1-1 v…
## 4 A9 A9 B1 B1 A9-B1-1 A9-B1-4 TRUE A9-B1-1 v…
## 5 A9 A9 B1 B1 A9-B1-1 A9-B1-5 TRUE A9-B1-1 v…
## 6 A9 A9 B1 B1 A9-B1-1 A9-B1-6 TRUE A9-B1-1 v…
## # … with 5 more variables: sam_ccf <dbl>, samebarrel_pred <dbl>,
## # data <list<df[,15]>>, truth <chr>, prediction <chr>
f2 <- f3nest %>% unnest(data)
f2 <- f2 %>% mutate(
samesource = ifelse(is.na(samesource) & !samebarrel, FALSE, samesource),
samesource = ifelse(is.na(samesource) & samebarrel, bestlandmatch, samesource)
)
head(f2)
## # A tibble: 6 x 27
## BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 samebarrel comparison
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <chr>
## 1 A9 A9 B1 B1 A9-B1-1 A9-B1-1 TRUE A9-B1-1 v…
## 2 A9 A9 B1 B1 A9-B1-1 A9-B1-2 TRUE A9-B1-1 v…
## 3 A9 A9 B1 B1 A9-B1-1 A9-B1-3 TRUE A9-B1-1 v…
## 4 A9 A9 B1 B1 A9-B1-1 A9-B1-4 TRUE A9-B1-1 v…
## 5 A9 A9 B1 B1 A9-B1-1 A9-B1-5 TRUE A9-B1-1 v…
## 6 A9 A9 B1 B1 A9-B1-1 A9-B1-6 TRUE A9-B1-1 v…
## # … with 19 more variables: sam_ccf <dbl>, samebarrel_pred <dbl>,
## # ccf <dbl>, cms <dbl>, D <dbl>, matches <dbl>, mismatches <dbl>,
## # non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## # sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## # bestlandmatch <lgl>, sameland_pred <lgl>, truth <chr>,
## # prediction <chr>
questioned <- f2 %>% filter(BarrelB == "Unknown")
questioned <- questioned %>%
ungroup(BarrelA) %>%
mutate(
BarrelA = factor(factor(BarrelA), levels = c("A9", "C8", "F6", "L5", "M2", "P7", "R3", "U1", "Unknown")),
BulletB = factor(BulletB, levels = rev(c("B", "E", "H", "J", "K", "N", "Q", "T", "Y", "Z"))),
BulletA = factor(BulletA, levels = c("B1", "B2", "B3", rev(levels(BulletB))))
)
questioned %>%
ggplot(aes(x = LandA, y = LandB, fill=ccf)) + geom_tile() +
scale_fill_gradient2(low="darkgrey", high="darkorange", midpoint=.5) +
facet_grid(BulletB~BarrelA+BulletA) +
geom_point(colour = "red", data = questioned %>% filter(samesource))
table(f2$samesource)
##
## FALSE TRUE
## 40072 732
f2 <- f2 %>% mutate(
samesource = ifelse(questioned$samesource == "TRUE" & f2$samesource == "FALSE", TRUE, samesource)
)
## Warning in questioned$samesource == "TRUE" & f2$samesource == "FALSE":
## longer object length is not a multiple of shorter object length
table(f2$samesource)
##
## FALSE TRUE
## 39447 1357
Accuracy for models is around the same. Now we will try and find miss classified ground truth results by examining signatures between Barrel_Bullet_Land comparisons.
Signatures_Phoenix %>%
filter(Barrel != "Unknown") %>%
ggplot(data = ., aes(x = x, y = sig, color = Land)) +
geom_line()+
facet_grid(paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel))+
theme_bw()+
ggtitle("Signatures for Known Barrels")
## Warning: Removed 6796 rows containing missing values (geom_path).
Signatures_Phoenix %>%
filter(Barrel == "Unknown") %>%
ggplot(data = ., aes(x = x, y = sig, color = Land)) +
geom_line()+
facet_wrap(paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel))+
theme_bw()+
ggtitle("Signatures for Questioned Bullets")
## Warning: Removed 4604 rows containing missing values (geom_path).
# High ccf & samesource('FALSE')
f2 %>%
filter(ccf >= 0.70 & samesource == "FALSE")
## # A tibble: 12 x 27
## BarrelA BarrelB BulletA BulletB Bullet1 Bullet2 samebarrel comparison
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <chr>
## 1 P7 L5 B3 B1 L5-B1-3 P7-B3-5 FALSE L5-B1-3 v…
## 2 Unknown L5 J B1 L5-B1-3 Unknow… FALSE L5-B1-3 v…
## 3 U1 L5 B1 B2 L5-B2-2 U1-B1-1 FALSE L5-B2-2 v…
## 4 Unknown M2 B B2 M2-B2-4 Unknow… FALSE M2-B2-4 v…
## 5 L5 P7 B1 B3 P7-B3-5 L5-B1-3 FALSE L5-B1-3 v…
## 6 Unknown P7 T B3 P7-B3-5 Unknow… FALSE P7-B3-5 v…
## 7 L5 U1 B2 B1 U1-B1-1 L5-B2-2 FALSE L5-B2-2 v…
## 8 M2 Unknown B2 B Unknow… M2-B2-4 FALSE M2-B2-4 v…
## 9 L5 Unknown B1 J Unknow… L5-B1-3 FALSE L5-B1-3 v…
## 10 Unknown Unknown T J Unknow… Unknow… FALSE Unknown-J…
## 11 P7 Unknown B3 T Unknow… P7-B3-5 FALSE P7-B3-5 v…
## 12 Unknown Unknown J T Unknow… Unknow… FALSE Unknown-J…
## # … with 19 more variables: sam_ccf <dbl>, samebarrel_pred <dbl>,
## # ccf <dbl>, cms <dbl>, D <dbl>, matches <dbl>, mismatches <dbl>,
## # non_cms <dbl>, overlap <dbl>, rough_cor <dbl>, sd_D <dbl>,
## # sum_peaks <dbl>, LandA <chr>, LandB <chr>, samesource <lgl>,
## # bestlandmatch <lgl>, sameland_pred <lgl>, truth <chr>,
## # prediction <chr>
Signatures_Phoenix %>%
filter(id %in% c("L5-B1-3", "P7-B3-5")) %>% #Not a Match
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) +
geom_line()+
theme_bw()+
scale_color_brewer(palette = "Dark2")+
ggtitle("L5-B1-3 VS P7-B3-5")
## Warning: Removed 1924 rows containing missing values (geom_path).
Signatures_Phoenix %>%
filter(id %in% c("L5-B1-3", "Unknown-J-5")) %>% #Possibly a Match(sum peaks suggest strong match)
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) +
geom_line()+
theme_bw()+
scale_color_brewer(palette = "Dark2")+
ggtitle("L5-B1-3 VS Unknown-J-5")
## Warning: Removed 1411 rows containing missing values (geom_path).
Signatures_Phoenix %>%
filter(id %in% c("L5-B2-2", "U1-B1-1")) %>% #Possibly a Match(sum peaks suggest strong match)
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) +
geom_line()+
theme_bw()+
scale_color_brewer(palette = "Dark2")+
ggtitle("L5-B2-2 vs U1-B1-1")
## Warning: Removed 1774 rows containing missing values (geom_path).
Signatures_Phoenix %>%
filter(id %in% c("M2-B2-4", "Unknown-B-2")) %>% #Match(sum peaks suggest strong match)
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) +
geom_line()+
theme_bw()+
scale_color_brewer(palette = "Dark2")+
ggtitle("M2-B2-4 vs Unknown-B-2")
## Warning: Removed 1765 rows containing missing values (geom_path).
Signatures_Phoenix %>%
filter(id %in% c("P7-B3-5", "Unknown-T-3")) %>% #Not a Match
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) +
geom_line()+
theme_bw()+
scale_color_brewer(palette = "Dark2")+
ggtitle("P7-B3-5 vs Unknown-T-3")
## Warning: Removed 1882 rows containing missing values (geom_path).
Signatures_Phoenix %>%
filter(id %in% c("Unknown-J-5", "Unknown-T-3")) %>% #Not a Match
ggplot(data = ., aes(x = x, y = sig, color = Bullet)) +
geom_line()+
theme_bw()+
scale_color_brewer(palette = "Dark2")+
ggtitle("Unknown-J-5 vs Unknown-T-3")
## Warning: Removed 1369 rows containing missing values (geom_path).
# Analyze signatures for these comparisons
f2 <- f2 %>% mutate(
samesource = ifelse(f2$comparison == "M2-B2-4 vs Unknown-B-2" & f2$samesource == "FALSE", TRUE, samesource)
)